Load the required packages and set a seed for the random number generator.
library(tidyverse)
library(rstan)
# set cores to use to the total number of cores (minimally 4)
options(mc.cores = max(parallel::detectCores(), 4))
# save a compiled version of the Stan model file
rstan_options(auto_write = TRUE)
library(brms)
# install faintr with devtools::install_github('michael-franke/bayes_mixed_regression_tutorial/faintr', build_vignettes = TRUE)
library(faintr)
set.seed(123)
Read the data:
raw_data = read_csv("results.csv")
## Warning: 1470 parsing failures.
## row col expected actual file
## 7106 comments 1/0/T/F/TRUE/FALSE I only assigned values between 2 and 5 because I expected nicer and uglier pictures to come. My values are not correct as absolute ratings. They are influenced by relative comarision to what appeared immediately before. 'results.csv'
## 7107 comments 1/0/T/F/TRUE/FALSE I only assigned values between 2 and 5 because I expected nicer and uglier pictures to come. My values are not correct as absolute ratings. They are influenced by relative comarision to what appeared immediately before. 'results.csv'
## 7108 comments 1/0/T/F/TRUE/FALSE I only assigned values between 2 and 5 because I expected nicer and uglier pictures to come. My values are not correct as absolute ratings. They are influenced by relative comarision to what appeared immediately before. 'results.csv'
## 7109 comments 1/0/T/F/TRUE/FALSE I only assigned values between 2 and 5 because I expected nicer and uglier pictures to come. My values are not correct as absolute ratings. They are influenced by relative comarision to what appeared immediately before. 'results.csv'
## 7110 comments 1/0/T/F/TRUE/FALSE I only assigned values between 2 and 5 because I expected nicer and uglier pictures to come. My values are not correct as absolute ratings. They are influenced by relative comarision to what appeared immediately before. 'results.csv'
## .... ........ .................. ........................................................................................................................................................................................................................... .............
## See problems(...) for more details.
glimpse(raw_data)
## Observations: 27,685
## Variables: 29
## $ submission_id <dbl> 605, 605, 605, 605, 605, 605, 605, 605, 605, 605, …
## $ QUD <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ RT <dbl> 15344, 8314, 65189, 10070, 5679, 2714, 4583, 2000,…
## $ age <dbl> 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23…
## $ artist <chr> NA, NA, NA, "P", "B", "G", "G", "P", "P", "P", "G"…
## $ color <chr> NA, NA, NA, "colored", "monochrome", "colored", "c…
## $ color2 <chr> NA, NA, NA, "sepia", "monochrome", "colored", "col…
## $ comments <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ date <dbl> NA, NA, NA, 1910, 1911, 1915, 1912, 1911, 1911, 19…
## $ education <chr> "Graduated College", "Graduated College", "Graduat…
## $ endTime <dbl> 1.563018e+12, 1.563018e+12, 1.563018e+12, 1.563018…
## $ expected <chr> "74", "7", "FELOPZD", NA, NA, NA, NA, NA, NA, NA, …
## $ experiment_id <dbl> 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77…
## $ gender <chr> "female", "female", "female", "female", "female", …
## $ languages <chr> "German", "German", "German", "German", "German", …
## $ min_chars <dbl> 0, 0, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ option1 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ option2 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ optionLeft <chr> NA, NA, NA, "not at all", "not at all", "not at al…
## $ optionRight <chr> NA, NA, NA, "very", "very", "very", "very", "very"…
## $ picture <chr> "vision/ishihara.png", "vision/ishihara2.jpg", "vi…
## $ pictureNumber <dbl> 0, 0, 0, 32, 65, 75, 84, 56, 71, 120, 89, 46, 16, …
## $ question <chr> "Which number do you see here?", "Which number do …
## $ response <chr> "74", "7", "FELOPZD", "4", "5", "3", "4", "3", "3"…
## $ startDate <chr> "Sat Jul 13 2019 13:21:37 GMT+0200 (CEST)", "Sat J…
## $ startTime <dbl> 1.563017e+12, 1.563017e+12, 1.563017e+12, 1.563017…
## $ timeSpent <dbl> 13.91613, 13.91613, 13.91613, 13.91613, 13.91613, …
## $ trial_name <chr> "colourVisionTest1", "colourVisionTest2", "visionT…
## $ trial_number <dbl> 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13…
Rename variables
data = rename(raw_data,
submissionID = `submission_id`,
trialName = `trial_name`
)
glimpse(data)
## Observations: 27,685
## Variables: 29
## $ submissionID <dbl> 605, 605, 605, 605, 605, 605, 605, 605, 605, 605, …
## $ QUD <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ RT <dbl> 15344, 8314, 65189, 10070, 5679, 2714, 4583, 2000,…
## $ age <dbl> 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23…
## $ artist <chr> NA, NA, NA, "P", "B", "G", "G", "P", "P", "P", "G"…
## $ color <chr> NA, NA, NA, "colored", "monochrome", "colored", "c…
## $ color2 <chr> NA, NA, NA, "sepia", "monochrome", "colored", "col…
## $ comments <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ date <dbl> NA, NA, NA, 1910, 1911, 1915, 1912, 1911, 1911, 19…
## $ education <chr> "Graduated College", "Graduated College", "Graduat…
## $ endTime <dbl> 1.563018e+12, 1.563018e+12, 1.563018e+12, 1.563018…
## $ expected <chr> "74", "7", "FELOPZD", NA, NA, NA, NA, NA, NA, NA, …
## $ experiment_id <dbl> 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77…
## $ gender <chr> "female", "female", "female", "female", "female", …
## $ languages <chr> "German", "German", "German", "German", "German", …
## $ min_chars <dbl> 0, 0, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ option1 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ option2 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ optionLeft <chr> NA, NA, NA, "not at all", "not at all", "not at al…
## $ optionRight <chr> NA, NA, NA, "very", "very", "very", "very", "very"…
## $ picture <chr> "vision/ishihara.png", "vision/ishihara2.jpg", "vi…
## $ pictureNumber <dbl> 0, 0, 0, 32, 65, 75, 84, 56, 71, 120, 89, 46, 16, …
## $ question <chr> "Which number do you see here?", "Which number do …
## $ response <chr> "74", "7", "FELOPZD", "4", "5", "3", "4", "3", "3"…
## $ startDate <chr> "Sat Jul 13 2019 13:21:37 GMT+0200 (CEST)", "Sat J…
## $ startTime <dbl> 1.563017e+12, 1.563017e+12, 1.563017e+12, 1.563017…
## $ timeSpent <dbl> 13.91613, 13.91613, 13.91613, 13.91613, 13.91613, …
## $ trialName <chr> "colourVisionTest1", "colourVisionTest2", "visionT…
## $ trial_number <dbl> 1, 1, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13…
Select only the variables that are important for the analysis:
data = select(data, submissionID, trialName,
response, pictureNumber, artist, date,
age, gender, timeSpent, color, color2)
glimpse(data)
## Observations: 27,685
## Variables: 11
## $ submissionID <dbl> 605, 605, 605, 605, 605, 605, 605, 605, 605, 605, …
## $ trialName <chr> "colourVisionTest1", "colourVisionTest2", "visionT…
## $ response <chr> "74", "7", "FELOPZD", "4", "5", "3", "4", "3", "3"…
## $ pictureNumber <dbl> 0, 0, 0, 32, 65, 75, 84, 56, 71, 120, 89, 46, 16, …
## $ artist <chr> NA, NA, NA, "P", "B", "G", "G", "P", "P", "P", "G"…
## $ date <dbl> NA, NA, NA, 1910, 1911, 1915, 1912, 1911, 1911, 19…
## $ age <dbl> 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23…
## $ gender <chr> "female", "female", "female", "female", "female", …
## $ timeSpent <dbl> 13.91613, 13.91613, 13.91613, 13.91613, 13.91613, …
## $ color <chr> NA, NA, NA, "colored", "monochrome", "colored", "c…
## $ color2 <chr> NA, NA, NA, "sepia", "monochrome", "colored", "col…
Exclude participants from the data that we don’t want to have. Also find out why we had to exclude them.
data_ex <- data
fehlerCV1 = 0
fehlerCV2 = 0
fehlerV = 0
fehlerExp = 0
fehlerT = 0
fehlerXP = 0
for(i in 1:(nrow(data) / 245)) {
# colorVisionTest1
if(data[((i - 1)*245 + 1), ]$response != "74") {
data_ex = filter(data_ex, submissionID != data[((i - 1)*245 + 1), ]$submissionID)
fehlerCV1 = fehlerCV1 + 1
# colorVisionTest2
} else if(data[((i - 1)*245 + 2), ]$response != "7") {
data_ex = filter(data_ex, submissionID != data[((i - 1)*245 + 2), ]$submissionID)
fehlerCV2 = fehlerCV2 + 1
# visionTest
} else if(data[((i - 1)*245 + 3), ]$response != "FELOPZD") {
data_ex = filter(data_ex, submissionID != data[((i - 1)*245 + 3), ]$submissionID)
fehlerV = fehlerV + 1
# expertise
} else if(as.integer(data[((i - 1)*245 + 244), ]$response) > 5) {
data_ex = filter(data_ex, submissionID != data[((i - 1)*245 + 244), ]$submissionID)
fehlerExp = fehlerExp + 1
# time spent
} else if(data[((i - 1)*245 + 1), ]$timeSpent < 5) {
data_ex = filter(data_ex, submissionID != data[((i - 1)*245 + 1), ]$submissionID)
fehlerT = fehlerT + 1
# exclude participants that took part in the course Experimental Psychology Lab
} else if(data[((i - 1)*245 + 245), ]$response == "Yes") {
data_ex = filter(data_ex, submissionID != data[((i - 1)*245 + 245), ]$submissionID)
fehlerXP = fehlerXP + 1
}
}
glimpse(data_ex)
## Observations: 15,680
## Variables: 11
## $ submissionID <dbl> 605, 605, 605, 605, 605, 605, 605, 605, 605, 605, …
## $ trialName <chr> "colourVisionTest1", "colourVisionTest2", "visionT…
## $ response <chr> "74", "7", "FELOPZD", "4", "5", "3", "4", "3", "3"…
## $ pictureNumber <dbl> 0, 0, 0, 32, 65, 75, 84, 56, 71, 120, 89, 46, 16, …
## $ artist <chr> NA, NA, NA, "P", "B", "G", "G", "P", "P", "P", "G"…
## $ date <dbl> NA, NA, NA, 1910, 1911, 1915, 1912, 1911, 1911, 19…
## $ age <dbl> 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23…
## $ gender <chr> "female", "female", "female", "female", "female", …
## $ timeSpent <dbl> 13.91613, 13.91613, 13.91613, 13.91613, 13.91613, …
## $ color <chr> NA, NA, NA, "colored", "monochrome", "colored", "c…
## $ color2 <chr> NA, NA, NA, "sepia", "monochrome", "colored", "col…
print(c("Number of excluded participants because of colorVisionTest1: ", fehlerCV1))
## [1] "Number of excluded participants because of colorVisionTest1: "
## [2] "18"
print(c("Number of excluded participants because of colorVisionTest2: ", fehlerCV2))
## [1] "Number of excluded participants because of colorVisionTest2: "
## [2] "2"
print(c("Number of excluded participants because of VisionTest: ", fehlerV))
## [1] "Number of excluded participants because of VisionTest: "
## [2] "24"
print(c("Number of excluded participants because of their expertise level in cubism: ", fehlerExp))
## [1] "Number of excluded participants because of their expertise level in cubism: "
## [2] "0"
print(c("Number of excluded participants because of TimeSpent: ", fehlerT))
## [1] "Number of excluded participants because of TimeSpent: "
## [2] "0"
print(c("Number of excluded participants because of participation in XPLab: ", fehlerXP))
## [1] "Number of excluded participants because of participation in XPLab: "
## [2] "5"
Calculate age mean, age range and gender distribution, number of participants:
data_ex %>% summarise(meanAge = mean(na.exclude(age)))
## # A tibble: 1 x 1
## meanAge
## <dbl>
## 1 29.3
gender_distribution = count(data_ex, gender)
gender_distribution$n = gender_distribution$n / 245
print(c("Age range: ", range(na.exclude(data_ex$age))))
## [1] "Age range: " "11" "81"
participants = nrow(data) / 245
participants_valid = nrow(data_ex) / 245
print(gender_distribution)
## # A tibble: 2 x 2
## gender n
## <chr> <dbl>
## 1 female 40
## 2 male 24
print(c("Total number of participants: ", participants))
## [1] "Total number of participants: " "113"
print(c("Number of valid participants: ", participants_valid))
## [1] "Number of valid participants: " "64"
transform the data into right form:
d = filter(data_ex, trialName %in% c("ratingScaleLike",
"ratingScaleDetect")) %>%
select(submissionID, trialName, pictureNumber, artist, date, response, color, color2) %>%
spread(key = trialName, value = response)
d$ratingScaleLike <- as.numeric(d$ratingScaleLike)
d$ratingScaleDetect <- as.numeric(d$ratingScaleDetect)
d = rename(d, detectability = ratingScaleDetect, liking = ratingScaleLike)
glimpse(d)
## Observations: 7,680
## Variables: 8
## $ submissionID <dbl> 551, 551, 551, 551, 551, 551, 551, 551, 551, 551, …
## $ pictureNumber <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,…
## $ artist <chr> "B", "B", "B", "B", "B", "B", "B", "B", "B", "P", …
## $ date <dbl> 1909, 1909, 1910, 1910, 1910, 1910, 1913, 1909, 19…
## $ color <chr> "colored", "colored", "colored", "colored", "color…
## $ color2 <chr> "colored", "sepia", "sepia", "sepia", "colored", "…
## $ detectability <dbl> 6, 5, 4, 2, 3, 2, 2, 6, 2, 2, 5, 2, 4, 4, 7, 2, 1,…
## $ liking <dbl> 2, 2, 2, 2, 2, 5, 2, 2, 2, 2, 1, 2, 2, 2, 3, 3, 4,…
Check that everything is there and of the correct type:
d_tibble <- as_tibble(d)
print(d_tibble)
## # A tibble: 7,680 x 8
## submissionID pictureNumber artist date color color2 detectability
## <dbl> <dbl> <chr> <dbl> <chr> <chr> <dbl>
## 1 551 1 B 1909 colo… color… 6
## 2 551 2 B 1909 colo… sepia 5
## 3 551 3 B 1910 colo… sepia 4
## 4 551 4 B 1910 colo… sepia 2
## 5 551 5 B 1910 colo… color… 3
## 6 551 6 B 1910 colo… sepia 2
## 7 551 7 B 1913 colo… color… 2
## 8 551 8 B 1909 colo… color… 6
## 9 551 9 B 1911 colo… sepia 2
## 10 551 10 P 1912 colo… color… 2
## # … with 7,670 more rows, and 1 more variable: liking <dbl>
inspect the data with x = detectability and y = liking for the different participants This part is just for our interest.
ggplot(d, aes(x = detectability, y = liking, group = submissionID)) + geom_jitter() + geom_smooth(method = "lm") + labs(x = "Detectability", y = "Liking")
inspect the data x = detectability and y = liking
ggplot(d, aes(x = detectability, y = liking)) + geom_jitter() + geom_smooth(method = "lm") + labs(x = "Detectability", y = "Liking")
ggplot(d, aes(x = detectability, y = liking)) + geom_point() + geom_smooth(method = "lm") + labs(x = "Detectability", y = "Liking")
Boxplots that shows liking and detectability
ggplot(d, aes(x = detectability, y = liking, group = detectability)) + geom_jitter() + geom_boxplot() + labs(x = "Detectability", y = "Liking")
ggplot(d, aes(x = detectability, y = liking, group = detectability)) + geom_boxplot() + labs(x = "Detectability", y = "Liking")
calculate means for detectability:
d %>% group_by(liking) %>% summarise(meanDetect = mean(detectability))
## # A tibble: 7 x 2
## liking meanDetect
## <dbl> <dbl>
## 1 1 2.68
## 2 2 2.88
## 3 3 3.37
## 4 4 3.74
## 5 5 4.12
## 6 6 4.28
## 7 7 5.13
calculate means for liking:
d %>% group_by(detectability) %>% summarise(meanLike = mean(liking))
## # A tibble: 7 x 2
## detectability meanLike
## <dbl> <dbl>
## 1 1 2.27
## 2 2 2.75
## 3 3 2.95
## 4 4 3.23
## 5 5 3.40
## 6 6 3.51
## 7 7 3.49
calculate means for different artists:
d %>% group_by(artist) %>% summarise(meanLike = mean(liking), meanDetect = mean(detectability))
## # A tibble: 3 x 3
## artist meanLike meanDetect
## <chr> <dbl> <dbl>
## 1 B 2.71 2.69
## 2 G 3.36 4.78
## 3 P 2.74 2.53
Boxplots and graphs that show liking and detectability (for the different artists)
dataB <- d %>% filter(artist == "B")
dataP <- d %>% filter(artist == "P")
dataG <- d %>% filter(artist == "G")
ggplot(dataB, aes(x = detectability, y = liking, group = detectability)) + geom_boxplot() + labs(x = "Detectability", y = "Liking", title = "Braque")
ggplot(dataP, aes(x = detectability, y = liking, group = detectability)) + geom_boxplot() + labs(x = "Detectability", y = "Liking", title = "Picasso")
ggplot(dataG, aes(x = detectability, y = liking, group = detectability)) + geom_boxplot() + labs(x = "Detectability", y = "Liking", title = "Gris")
ggplot(dataB, aes(x = detectability, y = liking)) + geom_point() + geom_smooth(method = "lm") + labs(x = "Detectability", y = "Liking", title = "Braque")
ggplot(dataP, aes(x = detectability, y = liking)) + geom_point() + geom_smooth(method = "lm") + labs(x = "Detectability", y = "Liking", title = "Picasso")
ggplot(dataG, aes(x = detectability, y = liking)) + geom_point() + geom_smooth(method = "lm") + labs(x = "Detectability", y = "Liking", title = "Gris")
Graphs / boxplots that show liking and detectability (for the different artists in one graph / boxplot)
ggplot(d, aes(x = detectability, y = liking, fill = artist)) + geom_point() + geom_smooth(method = "lm") + labs(x = "Detectability", y = "Liking", title = "Different artists", fill = "Artist")
ggplot(d, aes(x = as.factor(detectability), y = liking, fill = artist)) + geom_boxplot() + labs(x = "Detectability", y = "Liking", title = "Different artists", fill = "Artist")
Graphs / boxplots that show liking and detectability (for the color vs. monochrome in one graph / boxplot)
ggplot(d, aes(x = detectability, y = liking, fill = color)) + geom_point() + geom_smooth(method = "lm" ) + labs(x = "Detectability", y = "Liking", title = "Different colors", fill = "Color")
ggplot(d, aes(x = as.factor(detectability), y = liking, fill = color)) + geom_boxplot() + labs(x = "Detectability", y = "Liking", title = "Different colors", fill = "Color")
Graphs / boxplots that show liking and detectability (for the color vs. monochrome vs. sepia in one graph / boxplot)
ggplot(d, aes(x = detectability, y = liking, fill = color2)) + geom_point() + geom_smooth(method = "lm") + labs(x = "Detectability", y = "Liking", title = "Different colors", fill = "Color")
ggplot(d, aes(x = as.factor(detectability), y = liking, fill = color2)) + geom_boxplot() + labs(x = "Detectability", y = "Liking", title = "Different colors", fill = "Color")
calculate means for different colors:
d %>% group_by(color2) %>% summarise(meanLike = mean(liking))
## # A tibble: 3 x 2
## color2 meanLike
## <chr> <dbl>
## 1 colored 3.31
## 2 monochrome 2.77
## 3 sepia 2.73
Boxplots and graphs that show liking and detectability (for the different time span)
data1 <- d %>% filter(date <= 1911)
data2 <- d %>% filter(date >= 1912, date <= 1915)
data3 <- d %>% filter(date >= 1916)
ggplot(data1, aes(x = detectability, y = liking, group = detectability)) + geom_boxplot() + labs(x = "Detectability", y = "Liking", title = "Paintings from 1908-1911")
ggplot(data2, aes(x = detectability, y = liking, group = detectability)) + geom_boxplot() + labs(x = "Detectability", y = "Liking", title = "Paintings from 1912-1915")
ggplot(data3, aes(x = detectability, y = liking, group = detectability)) + geom_boxplot() + labs(x = "Detectability", y = "Liking", title = "Paintings from 1916-1919")
ggplot(data1, aes(x = detectability, y = liking)) + geom_point() + geom_smooth(method = "lm") + labs(x = "Detectability", y = "Liking", title = "Paintings from 1908-1911")
ggplot(data2, aes(x = detectability, y = liking)) + geom_point() + geom_smooth(method = "lm") + labs(x = "Detectability", y = "Liking", title = "Paintings from 1912-1915")
ggplot(data3, aes(x = detectability, y = liking)) + geom_point() + geom_smooth(method = "lm") + labs(x = "Detectability", y = "Liking", title = "Paintings from 1916-1919")
prepare the data for the analysis:
d = mutate(d,
liking = factor(liking, ordered = T),
detectability = factor(detectability, ordered = T),
color = factor(color),
color2 = factor(color2),
artist = factor(artist)
)
glimpse(d)
## Observations: 7,680
## Variables: 8
## $ submissionID <dbl> 551, 551, 551, 551, 551, 551, 551, 551, 551, 551, …
## $ pictureNumber <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,…
## $ artist <fct> B, B, B, B, B, B, B, B, B, P, B, P, P, P, P, P, P,…
## $ date <dbl> 1909, 1909, 1910, 1910, 1910, 1910, 1913, 1909, 19…
## $ color <fct> colored, colored, colored, colored, colored, color…
## $ color2 <fct> colored, sepia, sepia, sepia, colored, sepia, colo…
## $ detectability <ord> 6, 5, 4, 2, 3, 2, 2, 6, 2, 2, 5, 2, 4, 4, 7, 2, 1,…
## $ liking <ord> 2, 2, 2, 2, 2, 5, 2, 2, 2, 2, 1, 2, 2, 2, 3, 3, 4,…
ANALYSIS
Pearson correlation of liking and detectability:
cor(as.numeric(d$liking), as.numeric(d$detectability))
## [1] 0.2828698
M1. simple analysis: Treat the detectability scale as interval / numeric. Calculate the difference of the sample points: compare the sample points for each category of detectability (2 to 7) to the sample points from detectability = 1.
m1 = brm(formula = liking ~ as.numeric(detectability),
data = d,
family = cumulative("logit"))
print(m1)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: liking ~ as.numeric(detectability)
## Data: d (Number of observations: 7680)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample
## Intercept[1] -0.67 0.04 -0.75 -0.58 5345
## Intercept[2] 0.61 0.04 0.53 0.69 5711
## Intercept[3] 1.70 0.05 1.61 1.79 5131
## Intercept[4] 2.60 0.05 2.50 2.71 5098
## Intercept[5] 3.61 0.06 3.49 3.74 5020
## Intercept[6] 4.80 0.09 4.63 4.97 5613
## as.numericdetectability 0.27 0.01 0.25 0.29 5109
## Rhat
## Intercept[1] 1.00
## Intercept[2] 1.00
## Intercept[3] 1.00
## Intercept[4] 1.00
## Intercept[5] 1.00
## Intercept[6] 1.00
## as.numericdetectability 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
marginal_effects(m1, categorical = T)
marginal_effects(m1, categorical = F)
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default, which is likely invalid for ordinal
## families. Please set 'categorical' to TRUE.
M2: Model that treats detectability as monotonic and includes random effects
m2 = brm(formula = liking ~ mo(detectability) + (1|submissionID),
data = d,
family = "cumulative")
print(m2)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: liking ~ mo(detectability) + (1 | submissionID)
## Data: d (Number of observations: 7680)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~submissionID (Number of levels: 64)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 1.36 0.14 1.12 1.66 310 1.02
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept[1] -1.12 0.18 -1.47 -0.78 119 1.02
## Intercept[2] 0.54 0.18 0.20 0.87 121 1.02
## Intercept[3] 1.89 0.18 1.55 2.23 122 1.02
## Intercept[4] 2.94 0.18 2.59 3.28 125 1.02
## Intercept[5] 4.04 0.18 3.69 4.39 132 1.02
## Intercept[6] 5.27 0.20 4.89 5.64 151 1.01
## modetectability 1.66 0.09 1.48 1.84 2688 1.00
##
## Simplex Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## modetectability1[1] 0.26 0.04 0.18 0.33 4003 1.00
## modetectability1[2] 0.15 0.04 0.06 0.24 3117 1.00
## modetectability1[3] 0.19 0.05 0.09 0.29 3339 1.00
## modetectability1[4] 0.17 0.06 0.06 0.28 2599 1.00
## modetectability1[5] 0.12 0.05 0.02 0.23 3396 1.00
## modetectability1[6] 0.12 0.05 0.02 0.22 2722 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
marginal_effects(m2, categorical = T)
marginal_effects(m2, categorical = F)
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default, which is likely invalid for ordinal
## families. Please set 'categorical' to TRUE.
M3: Model that treats detectability as monotonic
m3 = brm(formula = liking ~ mo(detectability),
data = d,
family = "cumulative")
print(m3)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: liking ~ mo(detectability)
## Data: d (Number of observations: 7680)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept[1] -0.66 0.05 -0.75 -0.58 3859 1.00
## Intercept[2] 0.64 0.05 0.55 0.73 4403 1.00
## Intercept[3] 1.74 0.05 1.64 1.83 4752 1.00
## Intercept[4] 2.64 0.05 2.53 2.75 4513 1.00
## Intercept[5] 3.64 0.06 3.51 3.77 5365 1.00
## Intercept[6] 4.82 0.09 4.64 5.00 5827 1.00
## modetectability 1.59 0.07 1.46 1.72 3986 1.00
##
## Simplex Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## modetectability1[1] 0.45 0.03 0.38 0.51 4954 1.00
## modetectability1[2] 0.17 0.04 0.09 0.25 4051 1.00
## modetectability1[3] 0.20 0.05 0.11 0.29 5081 1.00
## modetectability1[4] 0.10 0.05 0.01 0.19 5331 1.00
## modetectability1[5] 0.05 0.03 0.00 0.13 5872 1.00
## modetectability1[6] 0.03 0.02 0.00 0.08 5289 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
marginal_effects(m3, categorical = T)
marginal_effects(m3, categorical = F)
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default, which is likely invalid for ordinal
## families. Please set 'categorical' to TRUE.
M4: Model that additionlly includes artist and color as an independent variable and includes random effects
m4 = brm(formula = liking ~ mo(detectability) + artist + color2 + (1|submissionID),
data = d,
family = "cumulative")
print(m4)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: liking ~ mo(detectability) + artist + color2 + (1 | submissionID)
## Data: d (Number of observations: 7680)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Group-Level Effects:
## ~submissionID (Number of levels: 64)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 1.42 0.13 1.18 1.70 275 1.03
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept[1] -1.57 0.19 -1.93 -1.20 134 1.03
## Intercept[2] 0.14 0.18 -0.23 0.50 132 1.03
## Intercept[3] 1.52 0.19 1.16 1.89 133 1.03
## Intercept[4] 2.60 0.19 2.23 2.97 136 1.03
## Intercept[5] 3.72 0.19 3.35 4.09 140 1.03
## Intercept[6] 4.96 0.20 4.57 5.36 156 1.03
## artistG 0.50 0.06 0.38 0.63 2463 1.00
## artistP 0.02 0.05 -0.09 0.12 3196 1.00
## color2monochrome -0.66 0.05 -0.76 -0.57 3565 1.00
## color2sepia -0.32 0.07 -0.45 -0.19 3381 1.00
## modetectability 1.20 0.10 1.01 1.40 2548 1.00
##
## Simplex Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## modetectability1[1] 0.33 0.05 0.23 0.43 3997 1.00
## modetectability1[2] 0.16 0.06 0.04 0.28 2659 1.00
## modetectability1[3] 0.16 0.07 0.03 0.29 1966 1.00
## modetectability1[4] 0.15 0.07 0.03 0.29 2814 1.00
## modetectability1[5] 0.09 0.06 0.01 0.22 4116 1.00
## modetectability1[6] 0.11 0.06 0.01 0.24 3210 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
marginal_effects(m4, categorical = T)
marginal_effects(m4, categorical = F)
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default, which is likely invalid for ordinal
## families. Please set 'categorical' to TRUE.
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default, which is likely invalid for ordinal
## families. Please set 'categorical' to TRUE.
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default, which is likely invalid for ordinal
## families. Please set 'categorical' to TRUE.
M5: Like M4, but without random effects
m5 = brm(formula = liking ~ mo(detectability) + artist + color2,
data = d,
family = "cumulative")
print(m5)
## Family: cumulative
## Links: mu = logit; disc = identity
## Formula: liking ~ mo(detectability) + artist + color2
## Data: d (Number of observations: 7680)
## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup samples = 4000
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept[1] -0.93 0.07 -1.07 -0.80 3802 1.00
## Intercept[2] 0.39 0.07 0.25 0.52 3991 1.00
## Intercept[3] 1.50 0.07 1.36 1.64 4144 1.00
## Intercept[4] 2.41 0.07 2.27 2.56 4239 1.00
## Intercept[5] 3.43 0.08 3.27 3.58 4494 1.00
## Intercept[6] 4.61 0.10 4.41 4.81 5160 1.00
## artistG 0.27 0.06 0.16 0.39 3391 1.00
## artistP 0.04 0.05 -0.06 0.15 4877 1.00
## color2monochrome -0.51 0.05 -0.60 -0.42 4864 1.00
## color2sepia -0.22 0.06 -0.34 -0.09 4558 1.00
## modetectability 1.41 0.07 1.28 1.55 3602 1.00
##
## Simplex Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## modetectability1[1] 0.51 0.04 0.43 0.58 4514 1.00
## modetectability1[2] 0.18 0.05 0.09 0.27 4495 1.00
## modetectability1[3] 0.18 0.05 0.07 0.28 4733 1.00
## modetectability1[4] 0.08 0.05 0.00 0.17 3912 1.00
## modetectability1[5] 0.04 0.03 0.00 0.11 4727 1.00
## modetectability1[6] 0.02 0.02 0.00 0.08 5111 1.00
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
marginal_effects(m5, categorical = T)
marginal_effects(m5, categorical = F)
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default, which is likely invalid for ordinal
## families. Please set 'categorical' to TRUE.
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default, which is likely invalid for ordinal
## families. Please set 'categorical' to TRUE.
## Warning: Predictions are treated as continuous variables in
## 'marginal_effects' by default, which is likely invalid for ordinal
## families. Please set 'categorical' to TRUE.
Save current workspace to be able to compare which model fits best afterwards
save.image("models.RData")
Analyse which model fits our data best
load("models.RData")
m1 <- add_criterion(m1, "waic")
m2 <- add_criterion(m2, "waic")
m3 <- add_criterion(m3, "waic")
m4 <- add_criterion(m4, "waic")
m5 <- add_criterion(m5, "waic")
loo_compare(m1, m2, m3, m4, m5, criterion = "waic")
## elpd_diff se_diff
## m4 0.0 0.0
## m2 -145.2 18.0
## m5 -1340.2 49.0
## m3 -1417.3 51.0
## m1 -1470.9 51.6